{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f3d4f52b-e58d-43b9-bd3e-9a1c23d5ec7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "suppressMessages(library(ArchR))\n",
    "suppressMessages(library(patchwork))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "375fa5f7-e4b0-4109-a74b-2541bb7214a8",
   "metadata": {},
   "source": [
    "### Set parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f98caa1e-6cdb-4417-bbbe-69873992090a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "set.seed(42)\n",
    "addArchRThreads(threads = parallel::detectCores() - 2)\n",
    "addArchRGenome(\"hg38\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19e76c24-ad51-4e13-a074-474ce8fd7e9a",
   "metadata": {},
   "source": [
    "### Create Arrow files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e428b62-34e6-434c-943a-f80a6e43b3e9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "## create Arrow files\n",
    "inputFiles <- c(\"CK166\" = \"../../Alignment/CK166/outs/fragments.tsv.gz\",\n",
    "                \"CK170\" = \"../../Alignment/CK170/outs/fragments.tsv.gz\",\n",
    "                \"CK174\" = \"../../Alignment/CK174/outs/fragments.tsv.gz\",\n",
    "                \"CK171\" = \"../../Alignment/CK171/outs/fragments.tsv.gz\",\n",
    "                \"CK169\" = \"../../Alignment/CK169/outs/fragments.tsv.gz\",\n",
    "                \"CK168\" = \"../../Alignment/CK168/outs/fragments.tsv.gz\",\n",
    "                \"CK173\" = \"../../Alignment/CK173/outs/fragments.tsv.gz\",\n",
    "                \"CK167\" = \"../../Alignment/CK167/outs/fragments.tsv.gz\",\n",
    "                \"CK353\" = \"../../Alignment/CK353/outs/fragments.tsv.gz\",\n",
    "                \"CK381\" = \"../../Alignment/CK381/outs/fragments.tsv.gz\",\n",
    "                \"CK336\" = \"../../Alignment/CK336/outs/fragments.tsv.gz\",\n",
    "                \"CK337\" = \"../../Alignment/CK337/outs/fragments.tsv.gz\",\n",
    "                \"CK338\" = \"../../Alignment/CK338/outs/fragments.tsv.gz\",\n",
    "                \"CK339\" = \"../../Alignment/CK339/outs/fragments.tsv.gz\",\n",
    "                \"CK340\" = \"../../Alignment/CK340/outs/fragments.tsv.gz\",\n",
    "                \"CK341\" = \"../../Alignment/CK341/outs/fragments.tsv.gz\",\n",
    "                \"CK380\" = \"../../Alignment/CK380/outs/fragments.tsv.gz\",\n",
    "                \"CK343\" = \"../../Alignment/CK343/outs/fragments.tsv.gz\",\n",
    "                \"CK354\" = \"../../Alignment/CK354/outs/fragments.tsv.gz\",\n",
    "                \"CK344\" = \"../../Alignment/CK344/outs/fragments.tsv.gz\",\n",
    "                \"CK385\" = \"../../Alignment/CK385/outs/fragments.tsv.gz\",\n",
    "                \"CK386\" = \"../../Alignment/CK386/outs/fragments.tsv.gz\",\n",
    "                \"CK346\" = \"../../Alignment/CK346/outs/fragments.tsv.gz\",\n",
    "                \"CK387\" = \"../../Alignment/CK387/outs/fragments.tsv.gz\",\n",
    "                \"CK388\" = \"../../Alignment/CK388/outs/fragments.tsv.gz\",\n",
    "                \"CK389\" = \"../../Alignment/CK389/outs/fragments.tsv.gz\",\n",
    "                \"CK355\" = \"../../Alignment/CK355/outs/fragments.tsv.gz\",\n",
    "                \"CK349\" = \"../../Alignment/CK349/outs/fragments.tsv.gz\",\n",
    "                \"CK382\" = \"../../Alignment/CK382/outs/fragments.tsv.gz\",\n",
    "                \"CK350\" = \"../../Alignment/CK350/outs/fragments.tsv.gz\",\n",
    "                \"CK351\" = \"../../Alignment/CK351/outs/fragments.tsv.gz\",\n",
    "                \"CK352\" = \"../../Alignment/CK352/outs/fragments.tsv.gz\",\n",
    "                \"CK383\" = \"../../Alignment/CK383/outs/fragments.tsv.gz\",\n",
    "                \"CK390\" = \"../../Alignment/CK390/outs/fragments.tsv.gz\",\n",
    "                \"CK391\" = \"../../Alignment/CK391/outs/fragments.tsv.gz\"\n",
    "               )\n",
    "\n",
    "minTSS <- 4\n",
    "minFrags <- 3000\n",
    "\n",
    "ArrowFiles <- createArrowFiles(\n",
    "  inputFiles = inputFiles,\n",
    "  sampleNames = names(inputFiles),\n",
    "  outputNames = names(inputFiles),\n",
    "  minTSS = minTSS, \n",
    "  minFrags = minFrags, \n",
    "  QCDir = \"../data/QualityControl\",\n",
    "  addTileMat = TRUE,\n",
    "  addGeneScoreMat = TRUE\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9835604-54f8-4215-9451-5d3a930948e3",
   "metadata": {},
   "source": [
    "### Visualize quality control"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fac96bb3-667e-4dcd-b787-1e149b810aa4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "## plot quality control\n",
    "plotlist <- lapply(names(inputFiles), function(sample){\n",
    "  input_filename <- sprintf(\"../data/QualityControl/%s/%s-Pre-Filter-Metadata.rds\", sample, sample)\n",
    "    \n",
    "    if(file.exists(input_filename)){\n",
    "        Metadata <- readRDS(input_filename)\n",
    "        \n",
    "        ggtitle <- sprintf(\"%s\",\n",
    "            paste0(sample, \"\\nnCells Pass Filter = \", sum(Metadata$Keep))\n",
    "          )\n",
    "        \n",
    "        gg <- ggPoint(\n",
    "          x = pmin(log10(Metadata$nFrags), 5) + rnorm(length(Metadata$nFrags), sd = 0.00001),\n",
    "          y = Metadata$TSSEnrichment + rnorm(length(Metadata$nFrags), sd = 0.00001), \n",
    "          colorDensity = TRUE,\n",
    "          xlim = c(2.5, 5),\n",
    "          ylim = c(0, max(Metadata$TSSEnrichment) * 1.05),\n",
    "          baseSize = 6,\n",
    "          continuousSet = \"sambaNight\",\n",
    "          xlabel = \"Log 10 (Unique Fragments)\",\n",
    "          ylabel = \"TSS Enrichment\",\n",
    "          title = ggtitle,\n",
    "          rastr = TRUE) + \n",
    "          geom_hline(yintercept=minTSS, lty = \"dashed\", size = 0.25) +\n",
    "          geom_vline(xintercept=log10(minFrags), lty = \"dashed\", size = 0.25) +\n",
    "            theme(plot.margin = margin(0.1, 0.1, 0.1, 0.1, unit = \"in\"))\n",
    "        \n",
    "        return(gg)  \n",
    "        }\n",
    "})\n",
    "               \n",
    "p <- patchwork::wrap_plots(plotlist, ncol = 7)\n",
    "                   \n",
    "options(repr.plot.width = 28, repr.height = 16)\n",
    "                   \n",
    "print(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31a3b3b4-8c9e-4495-829b-e42cea1b80f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Creating an ArchRProject\n",
    "proj <- ArchRProject(\n",
    "  ArrowFiles = ArrowFiles, \n",
    "  outputDirectory = \"../data/VisiumHeart\",\n",
    "  showLogo = FALSE,\n",
    "  copyArrows = TRUE #This is recommened so that you maintain an unaltered copy for later usage.\n",
    ")\n",
    "\n",
    "getAvailableMatrices(proj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c642667-26ae-4bd8-83c5-0efd34fbb5c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Inferring scATAC-seq Doublets with ArchR\n",
    "proj <- addDoubletScores(\n",
    "    input = proj,\n",
    "    k = 10, #Refers to how many cells near a \"pseudo-doublet\" to count.\n",
    "    knnMethod = \"UMAP\", #Refers to the embedding to use for nearest neighbor search with doublet projection.\n",
    "    LSIMethod = 1,\n",
    "    outDir = \"../data/DoubletScores\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "736b9c16-1907-4cbe-9394-9411a0ba9c14",
   "metadata": {},
   "outputs": [],
   "source": [
    "## remove doublets\n",
    "proj <- filterDoublets(proj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "745386d5-197c-4de0-bf9f-f095229ca647",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Save data\n",
    "saveArchRProject(ArchRProj = proj, \n",
    "                 load = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c1c8767-a1db-49ad-bf61-326e783b5838",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Session information\n",
    "\n",
    "sessionInfo()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d8c63bf-7048-411d-97e5-9fd1b810b4fa",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.0.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
